



*************************************************************************
* Generate a panel with 2007 & 2012 naics code, 1990-2016
*************************************************************************

* 1. import all NAICS code
import excel using "$crosswalks/2012_to_2007_NAICS.xls", cellrange(A3:D617) firstrow clear
rename C naics2007
rename NAICSCode naics2012
rename NAICSTitle industryname2012
rename NAICSTitleandspecificp industryname2007
drop if naics2007>= 400000
drop if naics2007< 300000
sum
order naics2007 naics2012 industryname2007 industryname2012

* 2. merge in the 98 industries 
preserve
	use "$crosswalks/crosswalk_by_naics_year.dta", clear
	keep census_naics naics_2007 naics_2012
	rename naics_2007 naics2007
	duplicates drop
	tempfile censuscode
	save  `censuscode.dta', replace
restore

merge m:1 naics2007 using `censuscode.dta'
count if naics_2012 != naics2012 &  naics_2012 !=.
drop _merge naics_2012
order census_naics


local new = _N + 1
set obs `new'
replace naics2007 = 31131 if naics2007 == .
replace naics2012 = 31131 if naics2012 == .
replace census_naics = 31131 if naics2007 == 31131


local new = _N + 1
set obs `new'
replace naics2007 = 31181 if naics2007 == .
replace naics2012 = 31181 if naics2012 == .
replace census_naics = 31181 if naics2007 == 31181


* 3. make sure naics2007 is unique 

* 334119 corresponds to 333316 & 334118. I decide to drop the first. 
* Graphs of the analysis behind this decision is generated in the Appendix section of this code
drop if naics2007 == 334119 & naics2012 == 333316
 
* 4. expand to 27 years (1990-2016)
expand 27
bysort naics2007: gen year = _n + 1989



/*************************************************************************
* Appendix I: 
* 333316 - 333315 
* 334118 = 334113 + 334119
*************************************************************************

**********************************
* I. check employment & wage 
**********************************

use "$QCEW/QCEW_state_ind_year.dta", clear
* 1. generate state string code
preserve
	import delimited using "$QCEW/codebook/state_titles.csv", varn(1) clear
	tempfile state_titles
	save `state_titles.dta', replace
restore

merge m:1 state_code using `state_titles.dta'
keep if _merge == 3
drop _merge

* 
rename NAICS_code naics
local s "US"
foreach y  in total_annual_wages annual_avg_emplvl {
* annual_avg_emplvl & total_annual_wages
graph twoway (connected `y' year if naics == 334119 & state == "`s'") ///
			(connected `y' year if naics == 334113 & state == "`s'") ///
			(connected `y' year if naics == 333315 & state == "`s'") ///
			(connected `y' year if naics == 334118 & state == "`s'") ///
			(connected `y' year if naics == 333316 & state == "`s'"), ///
			legend( order(1 "334119 *" 2 "334113" 3 "333315" 4 "334118" 5 "333316") col(3))
}
*
* 333315 & 333316 seems to be the same industry
* 334113 + 334119 = 334118

**********************************
* II. check ASM energy expenditure
**********************************

use "$ASMenergy/ASM.dta", clear

rename naicsid naics
sort naics year
foreach y  in shipment cst_fuel cst_elec q_elec_pch   {
* shipment cst_fuel cst_elec q_elec_pch 
graph twoway (connected `y' year if naics == 334119) ///
			(connected `y' year if naics == 334113 ) ///
			(connected `y' year if naics == 333315 ) ///
			(connected `y' year if naics == 334118 ) ///
			(connected `y' year if naics == 333316 ), ///
			legend( order(1 "334119 *" 2 "334113" 3 "333315" 4 "334118" 5 "333316") col(3))
}
*









